source_from_github(repositoy = "DEG_functions",version = "0.2.53")
ℹ SHA-1 hash of file is 8ed4e9017cead897a7d352226a5413f1b5fc0cb7
acc1_cancer_cells = readRDS("./Data/acc1_cancer_cells_2500features_integrated_V5.RDS")
all_acc_cancer_cells = readRDS("./Data/acc_cancer_cells_V5_integrated_primary.RDS")
acc_all_cells = readRDS("./Data/acc_tpm_nCount_mito_no146_15k_with_ACC1_.RDS")
acc_cancerCells_noACC1 = subset(all_acc_cancer_cells,subset = patient.ident != "HMSC")
luminal_pathways = c("CHARAFE_BREAST_CANCER_LUMINAL_VS_BASAL_DN","CHARAFE_BREAST_CANCER_LUMINAL_VS_BASAL_UP","CHARAFE_BREAST_CANCER_LUMINAL_VS_MESENCHYMAL_DN","CHARAFE_BREAST_CANCER_LUMINAL_VS_MESENCHYMAL_UP","HUPER_BREAST_BASAL_VS_LUMINAL_DN","LIM_MAMMARY_LUMINAL_PROGENITOR_UP","SMID_BREAST_CANCER_LUMINAL_B_UP" )
# add luminal pathwaysx
luminal_gs = msigdbr(species = "Homo sapiens") %>% as.data.frame() %>% dplyr::filter(gs_name %in% luminal_pathways)%>% dplyr::distinct(gs_name, gene_symbol) %>% as.data.frame()
DimPlot(all_acc_cancer_cells,group.by = "patient.ident",label = T)
FeaturePlot(object = all_acc_cancer_cells,features = c("MYB"),pt.size = 2)
FeaturePlot(object = all_acc_cancer_cells,features = c("kaye_acc_score"),pt.size = 1)
pdf("./Figures/kaye_acc_score_AllCancerCells.pdf")
FeaturePlot(object = all_acc_cancer_cells,features = c("kaye_acc_score"),pt.size = 1)
dev.off()
enrichment_analysis(acc_deg,background = VariableFeatures(all_acc_cancer_cells),fdr_Cutoff = 0.01,ident.1 = "HMSC",ident.2 = "ACC",show_by = 1)
Welcome to enrichR
Checking connection ...
Enrichr ... Connection is Live!
FlyEnrichr ... Connection is available!
WormEnrichr ... Connection is available!
YeastEnrichr ... Connection is available!
FishEnrichr ... Connection is available!
hallmark_name = "GO_MITOTIC_CELL_CYCLE"
genesets =getGmt("./Data/h.all.v7.0.symbols.pluscc.gmt")
acc_cancerCells_noACC1 = ScaleData(object = acc_cancerCells_noACC1,features = VariableFeatures(acc_cancerCells_noACC1,assay = "integrated"))
geneIds= genesets[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(acc_cancerCells_noACC1,assay = "integrated"))
score <- apply(acc_cancerCells_noACC1@assays$integrated@scale.data[geneIds,],2,mean)
acc_cancerCells_noACC1=AddMetaData(acc_cancerCells_noACC1,score,hallmark_name)
acc1_cancer_cells = FindVariableFeatures(object = acc1_cancer_cells,nfeatures = 15000)
acc1_cancer_cells = ScaleData(object = acc1_cancer_cells,features = VariableFeatures(acc1_cancer_cells))
geneIds= genesets[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(acc1_cancer_cells))
score <- apply(acc1_cancer_cells@assays$integrated@scale.data[geneIds,],2,mean)
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,score,hallmark_name)
acc_cc_scores = FetchData(object = acc_cancerCells_noACC1,vars = "GO_MITOTIC_CELL_CYCLE")
hmsc_cc_scores = FetchData(object = acc1_cancer_cells,vars = "GO_MITOTIC_CELL_CYCLE")
distributions_plt = ggplot() +
geom_density(aes(GO_MITOTIC_CELL_CYCLE, fill = "ACC"), alpha = .2, data = acc_cc_scores) +
geom_density(aes(GO_MITOTIC_CELL_CYCLE, fill = "HMSC"), alpha = .2, data = hmsc_cc_scores) +
scale_fill_manual(name = "Dataset", values = c(ACC = "red", HMSC = "green"))+ geom_vline(aes(xintercept=0.3),
color="blue", linetype="dashed", size=1) +ggtitle("'GO_MITOTIC_CELL_CYCLE' score distribution")
print_tab(plt = distributions_plt,title = "score distribution",subtitle_num = 3)
NA
hmsc_cc_cells = (sum(acc1_cancer_cells@meta.data[[hallmark_name]]> 0.3) /ncol(acc1_cancer_cells)) %>% round(digits = 3)*100
acc_cc_cells = (sum(acc_cancerCells_noACC1@meta.data[[hallmark_name]]> 0.3)/ncol(acc_cancerCells_noACC1)) %>% round(digits = 3)*100
df = data.frame(Dataset = c("HMSC","ACC"), cycling_cells_percentage = c(hmsc_cc_cells,acc_cc_cells))
cycling_cells_plt = ggplot(data=df, aes(x=Dataset, y=cycling_cells_percentage)) +
geom_text(aes(label=cycling_cells_percentage), vjust=0, color="black", size=3.5)+
geom_bar(stat="identity")+ylab(" % cycling cells")+
geom_bar(stat="identity", fill="steelblue")+
theme_minimal() + ggtitle("Cycling cells count")
print_tab(plt = cycling_cells_plt,title = "# cycling cells",subtitle_num = 3)
NA
pdf(file = "./Figures/CC_distributions.pdf")
distributions_plt
dev.off()
null device
1
pdf(file = "./Figures/cycling_cells.pdf")
cycling_cells_plt
dev.off()
null device
1
print_tab(plt =
FeaturePlot(all_acc_cancer_cells,features = c("MKI67","CDK1","MCM2","CDC20"))
,title = "CC genes",subtitle_num = 3)
NA
#add score to all acc cancer cells
# geneIds= genesets[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(all_acc_cancer_cells,assay = "integrated"))
# score <- apply(all_acc_cancer_cells@assays$integrated@scale.data[geneIds,],2,mean)
#add score to all acc cancer cells
cc_all = c(acc_cancerCells_noACC1$GO_MITOTIC_CELL_CYCLE, acc1_cancer_cells$GO_MITOTIC_CELL_CYCLE) %>% as.data.frame()
all_acc_cancer_cells=AddMetaData(all_acc_cancer_cells,cc_all,hallmark_name)
#filter:
all_acc_cancer_cells_ccFiltered=all_acc_cancer_cells[,all_acc_cancer_cells@meta.data[[hallmark_name]]< 0.3]
min_threshold = min(all_acc_cancer_cells$GO_MITOTIC_CELL_CYCLE)
max_threshold = max(all_acc_cancer_cells$GO_MITOTIC_CELL_CYCLE)
library(viridis)
Loading required package: viridisLite
print_tab(plt = FeaturePlot(object = all_acc_cancer_cells,features = hallmark_name) + ggtitle("Before cc filtering") &
scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
,title = "Before CC filtering",subtitle_num = 3)
Scale for ‘colour’ is already present. Adding another scale for ‘colour’, which will replace the existing scale.
print_tab(plt =
FeaturePlot(object = all_acc_cancer_cells_ccFiltered,features = hallmark_name) + ggtitle("After cc filtering") &
scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
,title = "After CC filtering" ,subtitle_num = 3)
Scale for ‘colour’ is already present. Adding another scale for ‘colour’, which will replace the existing scale.
NA
all_acc_cancer_cells_ccFiltered = SetIdent(all_acc_cancer_cells_ccFiltered, value ="patient.ident")
acc_deg <-
FindMarkers(
all_acc_cancer_cells_ccFiltered,
ident.1 = "HMSC",
features = VariableFeatures(all_acc_cancer_cells_ccFiltered),
densify = T,
verbose = T,
slot = "data",
mean.fxn = function(x) {
return(log(rowMeans(x) + 1,base = 2)) # change func to calculate logFC in log space data (default to exponent data)
},
assay = "RNA"
)
print_tab(plt = enrichment_analysis(acc_deg,background = VariableFeatures(all_acc_cancer_cells_ccFiltered),fdr_Cutoff = 0.01,ident.1 =
"HMSC",ident.2 = "ACC",show_by = 1)
,title = "Enrichment after filtering",subtitle_num = 3)
### Enrichment after filtering {.unnumbered }
NA
library(hypeR)
genesets <- msigdb_download("Homo sapiens",category="H") %>% append( msigdb_download("Homo sapiens",category="C2",subcategory = "CP:KEGG"))
ranked_vec = acc_deg[,"avg_log2FC"]%>% setNames(rownames(acc_deg)) %>% na.omit() # make named vector
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = genesets,up_only = F)
plt = hyp_dots(hyp_obj,merge = F)
plt1 = plt$up+ aes(size=nes)+ggtitle("up in HMSC")
plt2 = plt$dn+ aes(size=abs(nes))+ggtitle("up in ACC")
plt3 = plt1+plt2
plt3
pdf(file = "./Figures/ACC_vs_HMSC_GSEA.pdf",width = 13,height = 6)
plt3
dev.off()
volcano_plot(de_genes = acc_deg,fdr_cutoff = 0.05,fc_cutoff = 2, ident1 = "HMSC",ident2 = "ACC",top_genes_text = 4)
all_acc_cancer_cells_ccFiltered = SetIdent(all_acc_cancer_cells_ccFiltered, value ="patient.ident")
acc_deg <-
FindMarkers(
all_acc_cancer_cells_ccFiltered,
ident.1 = "HMSC",
features = VariableFeatures(all_acc_cancer_cells_ccFiltered),
densify = T,
verbose = T,
slot = "data",
mean.fxn = function(x) {
return(rowMeans(x) + 1) # change func to calculate logFC in log space data (default to exponent data)
},
assay = "RNA"
)
pdf("./Figures/volcano_plot_ACC_VS_HMSC.pdf")
volcano_plot(de_genes = acc_deg,fdr_cutoff = 0.05,ident1 = "HMSC",ident2 = "ACC",top_genes_text = 3,log2fc_cutoff = 0.5)
dev.off()
pdf("./Figures/Enrichment_analysis_ACC_VS_HMSC.pdf")
enrichment_analysis(acc_deg,background = VariableFeatures(all_acc_cancer_cells_ccFiltered),fdr_Cutoff = 0.01,ident.1 =
"HMSC",ident.2 = "ACC",show_by = 1)
dev.off()
top_hmsc_genes = acc_deg %>% dplyr::filter(avg_log2FC > 0) %>% slice_min(n = 10,order_by = p_val_adj) %>% rownames()
top_acc_genes = acc_deg %>% dplyr::filter(avg_log2FC < 0) %>% slice_min(n = 10,order_by = p_val_adj) %>% rownames()
all_top_deg = c(top_hmsc_genes,top_acc_genes)
all_acc_cancer_cells_ccFiltered$cancer_type = all_acc_cancer_cells_ccFiltered$patient.ident %>% gsub(pattern = "ACC.*",replacement = "ACC")
cancer_type = FetchData(object = all_acc_cancer_cells_ccFiltered, vars = "cancer_type")
# col_list = list(circlize::colorRamp2(c(0, 1), c("white", "red"))); names(col_list) = colnames(all_metagenes)[i]
column_ha = HeatmapAnnotation(df = cancer_type)
data = FetchData(object = all_acc_cancer_cells_ccFiltered,vars = all_top_deg,slot = "scale.data") %>% t()
print(ComplexHeatmap::Heatmap(data,show_column_names = F,row_names_gp = grid::gpar(fontsize = 7),cluster_rows = F, ,name = "Z-score expression",cluster_columns = F,top_annotation = column_ha))
NA
NA
# create expression matrix of acc + normal cells + HMSC seurat integrated
acc_all_cells_noAcc1 = subset(acc_all_cells, subset = patient.ident != "ACC1")
acc_expr = acc_all_cells_noAcc1@assays$RNA@data %>% as.data.frame()
hmsc_expr = acc.combined@assays$integrated@data %>% as.data.frame()
acc_expr = acc_expr [ rownames(hmsc_expr),]
all_expr = cbind(acc_expr,hmsc_expr)
# create annotation
acc_annotation_integrated = as.data.frame(acc_all_cells@meta.data[,"cell.type",drop = F])
acc_annotation_integrated = acc_annotation_integrated[colnames(all_expr),,drop = F]
acc_annotation_integrated = acc_annotation_integrated %>% rownames_to_column("orig.ident")
# #write expression and annotation
# write.table(acc_annotation_integrated, "./Data/inferCNV/acc_annotation_integrated.txt", append = FALSE,
# sep = "\t", dec = ".",row.names = FALSE, col.names = F)
#
#
# write.table(all_expr, "./Data/inferCNV/all.4icnv_integrated.txt", append = FALSE,
# sep = "\t", dec = ".",row.names = T, col.names = T)
trace(infercnv::run,edit = T) # to skip normalization, change to skip_past = 4 (https://github.com/broadinstitute/infercnv/issues/346)
Tracing function "run" in package "infercnv"
[1] "run"
infercnv_obj = CreateInfercnvObject(raw_counts_matrix="./Data/inferCNV/all.4icnv_integrated.txt",
annotations_file="./Data/inferCNV/acc_annotation_integrated.txt",
delim="\t",gene_order_file="./Data/inferCNV/gencode_v19_gene_pos.txt"
,ref_group_names=c("CAF", "Endothelial", "WBC")) #groups of normal cells
infercnv_obj_default = infercnv::run(infercnv_obj, cutoff=1, out_dir='./Data/inferCNV/infercnv_intergrated_output',
cluster_by_groups=T, plot_steps=FALSE,
denoise=TRUE, HMM=FALSE, no_prelim_plot=TRUE,
png_res=300)
untrace(infercnv::run)
trace(infercnv:::get_group_color_palette ,edit = T) # change "Set3" to "Set1" for more distinguishable colors
plot_cnv(infercnv_obj_default, output_format = "png", write_expr_matrix = FALSE,out_dir = "./Data/inferCNV/infercnv_intergrated_output",png_res =800,obs_title = "Malignant cells",ref_title = "Normal cells",contig_cex = 2, title = "Copy number variation")
untrace(infercnv:::get_group_color_palette)
print_tab(plt = knitr::include_graphics("./Data/inferCNV/infercnv_intergrated_output/infercnv.png")
,title = "CNV plot",subtitle_num = 3)
NA
library(limma)
smoothed=apply(infercnv_obj_default@expr.data,2,tricubeMovingAverage, span=0.01)
cnsig=sqrt(apply((smoothed-1)^2,2,mean))
acc_all_cells <- AddMetaData(object = acc_all_cells, metadata = cnsig, col.name = "copynumber")
acc_all_cells = SetIdent(object = acc_all_cells,value = "cell.type")
print_tab(plt = FeaturePlot(acc_all_cells, "copynumber",pt.size = 1,label = T,repel = T)+
scale_colour_gradientn(colours=c("white","lightblue","orange","red","darkred"))
,title = "CNV UMAP",subtitle_num = 3)
Scale for ‘colour’ is already present. Adding another scale for ‘colour’, which will replace the existing scale.
NA
acc1_cancer_cells = FindClusters(object = acc1_cancer_cells,verbose = F,resolution = 0.5)
DimPlot(object = acc1_cancer_cells,pt.size = 2)
FeaturePlot(object = acc1_cancer_cells,features = c("MYB","JAG1"),pt.size = 2)+
DimPlot(object = acc1_cancer_cells,group.by = c("hpv33_positive"),pt.size = 2)
SetIdent(object = acc1_cancer_cells,value = "seurat_clusters")
An object of class Seurat 80474 features across 134 samples within 2 assays Active assay: integrated (40237 features, 15000 variable features) 1 other assay present: RNA 2 dimensional reductions calculated: pca, umap
deg = FindAllMarkers(object = acc1_cancer_cells,features = VariableFeatures(acc1_cancer_cells),densify = T,verbose = F)
for (cluster in unique(deg$cluster)) {
print_tab(plt = deg[deg$cluster == cluster,],title = "DEG" ,subtitle_num = toc_tabs_level)
}
NA
for (cluster in unique(deg$cluster)) {
deg_of_cluster = deg[deg$cluster == cluster,]
# print(deg_of_cluster %in% original_myo_genes %>% which())
# print(deg_of_cluster %in% original_lum_genes %>% which())
print_tab(plt =
enrichment_analysis(deg_of_cluster,background =VariableFeatures(acc1_cancer_cells),fdr_Cutoff = 0.01,ident.1 =
paste("Cluster",cluster),ident.2 = paste("Cluster",cluster),show_by = 1)
,title = cluster,subtitle_num = toc_tabs_level)
}
from cnmf import cNMF
AttributeError: module 'matplotlib' has no attribute 'get_data_path'
knitr::include_graphics("./Data/cNMF/HMSC_cNMF_harmony_2Kvargenes/HMSC_cNMF_harmony_2Kvargenes.k_selection.png")
selected_k = 3
density_threshold = 0.1
cnmf_obj.consensus(k=selected_k, density_threshold=density_threshold,show_clustering=True)
usage_norm, gep_scores, gep_tpm, topgenes = cnmf_obj.load_results(K=selected_k, density_threshold=density_threshold)
gep_scores = py$gep_scores
gep_tpm = py$gep_tpm
all_metagenes= py$usage_norm
# Make metagene names
for (i in 1:ncol(all_metagenes)) {
colnames(all_metagenes)[i] = "metagene." %>% paste0(i)
}
#add each metagene to metadata
for (i in 1:ncol(all_metagenes)) {
metage_metadata = all_metagenes %>% select(i)
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = metage_metadata)
}
Note: Using an external vector in selections is ambiguous. ℹ Use
all_of(i) instead of i to silence this
message. ℹ See https://tidyselect.r-lib.org/reference/faq-external-vector.html.
This message is displayed once per session.
print_tab(plt =
FeaturePlot(object = acc1_cancer_cells,features = colnames(all_metagenes),combine = T),
title = "metagenes expression",subtitle_num = toc_tabs_level)
#add each metagene to metadata
for (i in 1:ncol(all_metagenes)) {
metage_metadata = all_metagenes %>% select(i)
lung_corr_nonneg = AddMetaData(object = lung_corr_nonneg,metadata = metage_metadata)
}
print_tab(plt =
FeaturePlot(object = lung_corr_nonneg,features = colnames(all_metagenes),combine = T),
title = "metagenes expression",subtitle_num = toc_tabs_level)
NA
# canonical_pathways = msigdbr(species = "Homo sapiens", category = "C2") %>% dplyr::filter(gs_subcat != "CGP") %>% dplyr::distinct(gs_name, gene_symbol)
plt_list = list()
for (i in 1:ncol(gep_scores)) {
top_genes = gep_scores %>% arrange(desc(gep_scores[i])) #sort by score a
top = head(rownames(top_genes),200) #take top top_genes_num
res = genes_vec_enrichment(genes = top,background = rownames(gep_scores),homer = T,title =
i,silent = T,return_all = T,custom_pathways = canonical_pathways)
plt_list[[i]] = res$plt
}
gridExtra::grid.arrange(grobs = plt_list)
for (i in 1:ncol(gep_scores)) {
ranked_vec = gep_scores %>% pull(i) %>% setNames(rownames(gep_scores))
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = genesets,up_only = T)
plt = hyp_dots(hyp_obj,merge = F)+ aes(size=abs(nes))
print(plt)
}
for (i in 1:ncol(gep_scores)) {
gep_scores = gep_scores %>% arrange(desc(gep_scores[i]))
ranked_vec = gep_scores %>% pull(i) %>% setNames(rownames(gep_scores))
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = genesets,up_only = T)
plt = hyp_dots(hyp_obj,merge = F)
print(plt)
}
library(ComplexHeatmap)
acc1_cancer_cells = SetIdent(object = acc1_cancer_cells,value = "seurat_clusters")
for (i in 1:ncol(gep_scores)) {
top_genes = gep_scores %>% arrange(desc(gep_scores[i])) #sort by score a
top = head(rownames(top_genes),50) #take top top_genes_num
data = FetchData(object = acc1_cancer_cells,vars = top)%>% scale() %>% t()
metagene_data = FetchData(object = acc1_cancer_cells,vars = colnames(all_metagenes)[i])
col_list = list(circlize::colorRamp2(c(0, 1), c("white", "red"))); names(col_list) = colnames(all_metagenes)[i]
column_ha = HeatmapAnnotation(df = metagene_data,col = col_list)
pdf(paste0("./Figures/NMF_top_genes_program",i,".pdf"))
print(ComplexHeatmap::Heatmap(data,show_column_names = F,row_names_gp = grid::gpar(fontsize = 7),cluster_rows = F, top_annotation = column_ha,name = "Z-score expression"))
dev.off()
}
original_myo_genes = c( "TP63", "TP73", "CAV1", "CDH3", "KRT5", "KRT14", "ACTA2", "TAGLN", "MYLK", "DKK3")
original_lum_genes = c("KIT", "EHF", "ELF5", "KRT7", "CLDN3", "CLDN4", "CD24", "LGALS3", "LCN2", "SLPI" )
FeaturePlot(acc1_cancer_cells,features = original_myo_genes)
FeaturePlot(acc1_cancer_cells,features = original_lum_genes)
all_acc_cancer_cells = SetIdent(all_acc_cancer_cells,value = "patient.ident")
calculate_score(dataset = all_acc_cancer_cells,myo_genes = original_myo_genes,lum_genes = original_lum_genes)
myo_expr = FetchData(object = acc1_cancer_cells,vars = c(original_myo_genes))
myo_cor = cor(myo_expr)
pheatmap(myo_cor)
myo_expr = FetchData(object = acc1_cancer_cells,vars = c(original_lum_genes))
myo_cor = cor(myo_expr)
pheatmap(myo_cor)
myo_expr = FetchData(object = acc_cancerCells_noACC1,vars = c(original_myo_genes))
myo_cor = cor(myo_expr)
pheatmap(myo_cor)
myo_expr = FetchData(object = acc_cancerCells_noACC1,vars = c(top_lum,lum))
myo_cor = cor(myo_expr)
pheatmap(myo_cor)
lum = c("LCN2","CLDN3","ELF5","MMP7", "GABRP","CALML5","EFNA5")
myo = myo_enriched_genes
trace(calculate_score, edit = T)
calculate_score(dataset = acc1_cancer_cells,myo_genes = original_myo_genes,lum_genes = original_lum_genes,lum_threshold = 0,myo_threshold = 0)
cd_features <- list(original_myo_genes)
acc1_cancer_cells <- AddModuleScore(
object = acc1_cancer_cells,
features = cd_features,
ctrl = 5,
name = 'myo_Features'
)
FeaturePlot(object = acc1_cancer_cells,features = "myo_Features1")
cd_features <- list(c("KRT7", "LGALS3" ,"LCN2" , "SLPI"))
acc1_cancer_cells <- AddModuleScore(
object = acc1_cancer_cells,
features = cd_features,
ctrl = 5,
name = 'lum_Features'
)
FeaturePlot(object = acc1_cancer_cells,features = "lum_Features1")
cor(acc1_cancer_cells$lum_Feoriginal_myo_genesatures1,acc1_cancer_cells$myo_Features1)
myo_protein_markers = c("CNN1", "TP63","ACTA2")
top_myo = top_correlated(dataset = acc1_cancer_cells, genes = myo_protein_markers,threshold = 0.35)
print("Number of genes = " %>% paste(length(top_myo)))
message("Names of genes:")
top_myo %>% head(30)
message("Genes that also apeared in the original score:")
base::intersect(top_myo,original_myo_genes)
myo_enrich_res = genes_vec_enrichment(genes = top_myo,background = rownames(acc1_cancer_cells),homer = T,title = "myo top enrichment",custom_pathways = luminal_gs)
myo_enrich_res
lum_protein_markers = c("KIT")
top_lum = top_correlated(dataset = acc1_cancer_cells, genes = lum_protein_markers,threshold = 0.30,n_vargenes = 5000)
print("Number of genes = " %>% paste(length(top_lum)))
message("Names of genes:")
top_lum %>% head(30)
message("Genes that also apeared in the original score:")
base::intersect(top_lum,original_lum_genes)
lum_enrich_res = genes_vec_enrichment(genes = top_lum,background = rownames(acc1_cancer_cells),homer = T,title = "lum top enrichment",custom_pathways = luminal_gs)
lum_enrich_res
rownames(myo_enrich_res) = myo_enrich_res$pathway_name
myo_enriched_genes = myo_enrich_res[1,"geneID"] %>% strsplit(split = "/") %>% .[[1]] %>% c(.,myo_protein_markers) #add original markers
myoscore=FetchData(object =acc1_cancer_cells,vars = myo_enriched_genes,slot = "data") %>% rowMeans()
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,myoscore,"myo_score")
myoscore=FetchData(object =acc1_cancer_cells,vars = top_myo,slot = "data") %>% rowMeans()
acc1_cancer_cells=AddMetaData(acc1_cancer_cells,myoscore,"myo_score")
HPV33_P3 = fread("./Data/HPV33_P3.txt",col.names = c("plate","reads")) %>% as.data.frame()
HPV33_P3.df = HPV33_P3 %>% mutate(
plate = gsub(x =HPV33_P3$plate, replacement = "",pattern = "_.*$")
%>% gsub(pattern = "-P",replacement = ".P")
%>% gsub(pattern = "-",replacement = "_",)
)
HPV33_P3.df = HPV33_P3.df %>% dplyr::filter(HPV33_P3.df$plate %in% colnames(acc1_cancer_cells))
rownames(HPV33_P3.df) <- HPV33_P3.df$plate
HPV33_P3.df$plate = NULL
HPV33_P2 = fread("./Data/HPV33_P2.txt",col.names = c("plate","reads")) %>% as.data.frame()
HPV33_P2.df = HPV33_P2 %>% mutate(
plate = gsub(x =HPV33_P2$plate, replacement = "",pattern = "_.*$")
%>% gsub(pattern = "plate2-",replacement = "plate2_",)
%>% gsub(pattern = "-",replacement = "\\.",)
)
HPV33_P2.df = HPV33_P2.df %>% dplyr::filter(HPV33_P2.df$plate %in% colnames(acc1_cancer_cells))
rownames(HPV33_P2.df) <- HPV33_P2.df$plate
HPV33_P2.df$plate = NULL
HPV33 = rbind(HPV33_P3.df,HPV33_P2.df)
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = HPV33,col.name = "HPV33.reads")
FeaturePlot(acc1_cancer_cells,features = "HPV33.reads",max.cutoff = 10)
data = FetchData(object = acc1_cancer_cells,vars = "HPV33.reads")
data = data %>% mutate("0 reads" = if_else(condition = HPV33.reads == 0,true = 1,false = 0))
data = data %>% mutate("1 reads" = if_else(condition = HPV33.reads == 1,true = 1,false = 0))
data = data %>% mutate("2 reads" = if_else(condition = HPV33.reads == 2,true = 1,false = 0))
data = data %>% mutate("3-23 reads" = if_else(condition = HPV33.reads >=3 &HPV33.reads <24,true = 1,false = 0))
data = data %>% mutate("24+ reads" = if_else(condition = HPV33.reads >=24,true = 1,false = 0))
data = colSums(data[,2:ncol(data)]) %>% as.data.frame()
names(data)[1] = "count"
data = rownames_to_column(data,var = "bin")
data
ggplot(data=data, aes(x=factor(bin,levels = c("0 reads","1 reads","2 reads","3-23 reads","24+ reads")), y=count)) +
geom_bar(stat="identity", fill="steelblue") + xlab("HPV Reads")+ theme_minimal()+
geom_text(aes(label=count), vjust=-0.5, color="black", size=3.5)
hpv33_positive = HPV33 %>% dplyr::mutate(hpv33_positive = case_when(reads >= 10 ~ "positive",
reads < 10 ~ "negative")
)
hpv33_positive$reads = NULL
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = hpv33_positive)
DimPlot(object = acc1_cancer_cells,group.by = c("hpv33_positive"),pt.size = 2)
library(biomaRt)
ensembl = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
all_up_genes
# library("biomaRt")
# mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
# all_coding_genes <- getBM(attributes = c( "hgnc_symbol"), filters = c("biotype"), values = list(biotype="protein_coding"), mart = mart)
acc1_cancer_cells = FindVariableFeatures(acc1_cancer_cells,nfeatures = 15000)
acc1_cancer_cells = SetIdent(acc1_cancer_cells, value ="hpv33_positive")
features = VariableFeatures(acc1_cancer_cells)
acc_deg <-
FindMarkers(
acc1_cancer_cells,
ident.1 = "positive",
ident.2 = "negative",
features = features,
densify = T,
assay = "RNA",
test.use = "LR",
latent.vars = "plate",
logfc.threshold = 0.25,
min.pct = 0.15,
mean.fxn = function(x) {
return(log(rowMeans(x) + 1, base = 2)) # change func to calculate logFC in log space data (default to exponent data)
}
)
acc_deg$fdr<-p.adjust(p = as.vector(acc_deg$p_val) ,method = "fdr" )
ranked_vec = acc_deg[,"avg_log2FC"]%>% setNames(rownames(acc_deg)) %>% na.omit() # make named vector
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = genesets,up_only = F)
hyp_dots(hyp_obj,merge = F)
acc_deg
acc_deg[c("MYB","JAG1"),]
NA
volcano_plot(de_genes = acc_deg,fc_cutoff = 1.3, fdr_cutoff = 0.1,show_gene_names = c("MYB","JAG1"),ident1 = "HPV33 positive",ident2 = "HPV33 negative",top_genes_text = 5)
# library("biomaRt")
# mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
# all_coding_genes <- getBM(attributes = c( "hgnc_symbol"), filters = c("biotype"), values = list(biotype="protein_coding"), mart = mart)
acc1_cancer_cells = FindVariableFeatures(acc1_cancer_cells,nfeatures = 15000)
acc1_cancer_cells = SetIdent(acc1_cancer_cells, value ="hpv33_positive")
features = VariableFeatures(acc1_cancer_cells)
acc_deg <-
FindMarkers(
acc1_cancer_cells,
ident.1 = "positive",
ident.2 = "negative",
features = features,
densify = T,
assay = "RNA",
test.use = "LR",
latent.vars = "plate",
logfc.threshold = 0.5,
min.pct = 0.15,
mean.fxn = function(x) {
return(rowMeans(x) + 1) # change func to calculate logFC in log space data (default to exponent data)
}
)
acc_deg$fdr<-p.adjust(p = as.vector(acc_deg$p_val) ,method = "fdr" )
acc_deg
acc_deg[c("MYB","JAG1"),]
NA
volcano_plot(de_genes = acc_deg,fc_cutoff = 1.3, fdr_cutoff = 0.1,show_gene_names = c("MYB","JAG1"),ident1 = "HPV33 positive",ident2 = "HPV33 negative",top_genes_text = 5)+xlab("avg diff")
notch_genes = c("JAG1","JAG2","NOTCH3","NOTCH2","NOTCH1","DLL1","MYB","HES4","HEY1","HEY2","NRARP","myo_score")
for (gene in notch_genes) {
myb_vs_hpv = FetchData(object = acc1_cancer_cells,vars = c("hpv33_positive",gene))
myb_vs_hpv$hpv33_positive = paste("HPV33",myb_vs_hpv$hpv33_positiv)
p = ggboxplot(myb_vs_hpv, x = "hpv33_positive", y = gene,
palette = "jco",
add = "jitter")+ stat_compare_means(method = "wilcox.test",comparisons = list(c("HPV33 positive","HPV33 negative")))+ stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("log2(gene)")+ggtitle(gene)
print_tab(p,title = gene)
}
myb_vs_hpv = FetchData(object = acc1_cancer_cells,vars = c("hpv33_positive",c("JAG1","JAG2")))
myb_vs_hpv$hpv33_positive = paste("HPV33",myb_vs_hpv$hpv33_positiv)
df = melt(myb_vs_hpv)
ggboxplot(df, x = "variable", y = "value",color = "hpv33_positive",
palette = "jco",
add = "jitter") + stat_compare_means(method = "t.test", ref.group = ".all.") & stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text")
notch_targets = c("NOTCH3","HES4","HEY1","HEY2","NRARP")
notch_ligand = c("JAG1","JAG2","DLL1")
notch_genes = list(notch_targets = notch_targets,notch_ligand = notch_ligand)
for (i in 1:length(notch_genes)) {
genes = notch_genes[[i]]
name = names( notch_genes)[i]
myb_vs_hpv = FetchData(object = acc1_cancer_cells,vars = c(genes)) %>% rowMeans()
myb_vs_hpv = myb_vs_hpv %>% cbind(FetchData(object = acc1_cancer_cells,vars = c("hpv33_positive")))
colnames(myb_vs_hpv)[1] = "gene_set"
myb_vs_hpv$hpv33_positive = paste("HPV33",myb_vs_hpv$hpv33_positiv)
p = ggboxplot(myb_vs_hpv, x = "hpv33_positive", y = "gene_set",
palette = "jco",
add = "jitter")+ stat_compare_means(method = "wilcox.test",comparisons = list(c("HPV33 positive","HPV33 negative")))+ stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("log2(gene)")+ggtitle(name)
print(p)
}
cor_data = FetchData(object = acc1_cancer_cells,vars = c("MYB","myo_score"))
ggplot(cor_data, aes(x=MYB, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = c("JAG1","myo_score"))
ggplot(cor_data, aes(x=JAG1, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = c("JAG2","myo_score"))
ggplot(cor_data, aes(x=JAG2, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = c("DLL1","myo_score"))
ggplot(cor_data, aes(x=DLL1, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = notch_targets) %>% rowMeans()
cor_data = cor_data %>% cbind(FetchData(object = acc1_cancer_cells,vars = c("myo_score")))
colnames(cor_data)[1] = "notch_targets"
ggplot(cor_data, aes(x=notch_targets, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = notch_ligand) %>% rowMeans()
cor_data = cor_data %>% cbind(FetchData(object = acc1_cancer_cells,vars = c("myo_score")))
colnames(cor_data)[1] = "notch_ligand"
ggplot(cor_data, aes(x=notch_ligand, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
notch_score = FetchData(object = all_acc_cancer_cells,vars = notch_targets) %>% rowMeans()
all_acc_cancer_cells = AddMetaData(object = all_acc_cancer_cells,metadata = notch_score,col.name = "notch_score")
FeaturePlot(object = all_acc_cancer_cells,features = "notch_score" )
myo_markers = c("TP63", "TP73", "KRT14", "CDH3")
score = FetchData(object = acc1_cancer_cells,vars = myo_markers) %>% rowMeans()
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = score,col.name = "myo_markers_score")
FeaturePlot(object = acc1_cancer_cells,features = "myo_markers_score",pt.size = 2 )
markers = c("CLDN3", "ANXA8", "EHF", "KIT")
score = FetchData(object = acc1_cancer_cells,vars = markers) %>% rowMeans()
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = score,col.name = "lum_markers_score")
FeaturePlot(object = acc1_cancer_cells,features = "lum_markers_score" ,pt.size = 2 )